{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "file_name = \"psy_eval_gpt.txt\""
      ],
      "metadata": {
        "id": "yZ8tPoK2Pgvq"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "api_key = \"sk-NYJ30H6jdMFO32ueD6C8116e87Ac435aA9B73a976c87Fc0a\""
      ],
      "metadata": {
        "id": "I7Osv8nhBlmL"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def constructScoreTable(role_name, sub_factors, score, current_table={}):\n",
        "    factors_list = sub_factors.split(\"_\")\n",
        "    unique_list = list(set(factors_list))\n",
        "    filtered_list = [item for item in unique_list if item != '']\n",
        "    max_length = 0\n",
        "    for key, value in current_table.items():\n",
        "        if len(value) > max_length:\n",
        "            max_length = len(value)\n",
        "\n",
        "    unseen_factors = [factor for factor in filtered_list if factor not in current_table.keys()]\n",
        "    seen_factors = [factor for factor in filtered_list if factor in current_table.keys()]\n",
        "\n",
        "    for factor in unseen_factors:\n",
        "        current_table[factor] = max_length * [\"null\"] + [score]\n",
        "\n",
        "    for factor in current_table.keys():\n",
        "        if factor in seen_factors:\n",
        "            current_table[factor].append(score)\n",
        "        elif factor in unseen_factors:\n",
        "            continue\n",
        "        else:\n",
        "            current_table[factor].append(\"null\")\n",
        "    return current_table"
      ],
      "metadata": {
        "id": "kb4FhsSw3hd-"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "prompt = \"\"\"\n",
        "You will read a psychological assessment report. This psychological assessment report\n",
        "\n",
        "assesses whether the subject has a high {dimension} personality.\n",
        "\n",
        "Based on this report, output a json\n",
        "\n",
        "containing two fields: score and reason\n",
        "\n",
        "score is between -5 to 5 points\n",
        "\n",
        "If the subject shows high {dimension} personality in many factors, the score is 5 points\n",
        "\n",
        "If the subject shows high {dimension} personality in a single factor, the score is 2 points\n",
        "\n",
        "If the report is unable to determine the subject's personality, the score is 0 points\n",
        "\n",
        "If the subject shows low {dimension} personality in a single factor, the score is -2 points\n",
        "\n",
        "If the subject shows low {dimension} personality in many factors, the score is -5 points\n",
        "\n",
        "reason is a brief summary of the report\n",
        "\n",
        "Only output the json, do not output any additional information\n",
        "\n",
        "Report:\n",
        "\"\"\""
      ],
      "metadata": {
        "id": "_2oC9XTVABmG"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import requests\n",
        "import json\n",
        "\n",
        "def get_openai_score(prompt, report):\n",
        "    prompt = prompt + \" \\n \" + report\n",
        "    # 设置请求参数\n",
        "    headers = {\n",
        "        'Content-Type': 'application/json',\n",
        "        'Authorization': f'Bearer {api_key}'\n",
        "    }\n",
        "\n",
        "    data = {\n",
        "        \"model\": \"text-davinci-003\",\n",
        "        \"prompt\": prompt,\n",
        "        \"max_tokens\": 1024,\n",
        "        \"temperature\": 0.8,\n",
        "        \"n\": 5,\n",
        "        \"logprobs\": 1\n",
        "    }\n",
        "\n",
        "    response = requests.post('https://api.nsxyw.com/v1/completions', headers=headers, json=data)\n",
        "\n",
        "    if response.status_code == 200:\n",
        "        result = response.json()\n",
        "        generated_text = result['choices'][0]['text']\n",
        "        return generated_text\n",
        "        # print(generated_text)\n",
        "    else:\n",
        "        return generated_text\n",
        "        # print(response.text)"
      ],
      "metadata": {
        "id": "bcA_m4tbxgy0"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def extract_subtable(data, prefix):\n",
        "    subtable = {}\n",
        "    for key, value in data.items():\n",
        "        if key.startswith(prefix):\n",
        "            subtable[key] = value\n",
        "    return subtable\n"
      ],
      "metadata": {
        "id": "kpJXlrUeMWkD"
      },
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def get_column_avg(sub_table):\n",
        "    df = pd.DataFrame(sub_table)\n",
        "    df = df.replace(\"null\", pd.NA)\n",
        "    column_means = df.mean()\n",
        "    column_means = column_means.to_dict()\n",
        "    return column_means\n",
        "\n",
        "def get_average_row_means(sub_table):\n",
        "    df = pd.DataFrame(sub_table)\n",
        "    df = df.replace('null', pd.NA)\n",
        "    row_means = df.mean(axis=1)\n",
        "    average_of_row_means = row_means.mean()\n",
        "    return average_of_row_means"
      ],
      "metadata": {
        "id": "5ZoSBK1wOYx_"
      },
      "execution_count": 13,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def indexing_subtable(sub_table):\n",
        "\n",
        "    global_min_index = float('inf')\n",
        "    global_max_index = -1\n",
        "\n",
        "    for key, lst in sub_table.items():\n",
        "        min_index = float('inf')\n",
        "        max_index = -1\n",
        "\n",
        "        for i, val in enumerate(lst):\n",
        "            if val != \"null\":\n",
        "                min_index = min(min_index, i)\n",
        "                max_index = max(max_index, i)\n",
        "\n",
        "        global_min_index = min(global_min_index, min_index)\n",
        "        global_max_index = max(global_max_index, max_index)\n",
        "\n",
        "    new_table = {}\n",
        "    for key, lst in sub_table.items():\n",
        "        lst = lst[global_min_index:global_max_index+1]\n",
        "        new_table[key] = lst\n",
        "    print(global_min_index)\n",
        "    print(global_max_index)\n",
        "    return new_table\n"
      ],
      "metadata": {
        "id": "oe0Wb8FxTal6"
      },
      "execution_count": 27,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import json\n",
        "\n",
        "with open(file_name, 'r', encoding='utf-8') as file:\n",
        "    # 使用readlines()方法按行读取文件内容\n",
        "    file_lines = file.readlines()\n",
        "\n",
        "tables_list = []\n",
        "table = {}\n",
        "role_name = json.loads(file_lines[0])[\"role_name\"]\n",
        "\n",
        "for line in file_lines:\n",
        "    data = json.loads(line)\n",
        "    score_json = get_openai_score(prompt, data[\"evaluation\"])\n",
        "    parts = score_json.split(\"{\", 1)  # 第二个参数表示最多拆分成两部分\n",
        "    if len(parts) > 1:\n",
        "        result_string = \"{\" + parts[1]\n",
        "    else:\n",
        "        result_string = score_json\n",
        "    # score_json = json.loads(result_string)\n",
        "    try:\n",
        "        score_json = json.loads(result_string)\n",
        "    except:\n",
        "        continue\n",
        "\n",
        "    score = score_json[\"score\"]\n",
        "    sub_factors = data[\"sub_factors\"]\n",
        "    if role_name == data[\"role_name\"]:\n",
        "        table = constructScoreTable(role_name, sub_factors, score, table)\n",
        "    else:\n",
        "        N_table = extract_subtable(table, 'N')\n",
        "        C_table = extract_subtable(table, 'C')\n",
        "        A_table = extract_subtable(table, 'A')\n",
        "        O_table = extract_subtable(table, 'O')\n",
        "        E_table = extract_subtable(table, 'E')\n",
        "\n",
        "        tables_list.append(table)\n",
        "        table = constructScoreTable(role_name, sub_factors, score, {})\n",
        "        with open(\"result.jsonl\", \"a\") as jsonl_file:\n",
        "            added_score = {\n",
        "                \"role_name\": role_name,\n",
        "                \"N\":{\n",
        "                    \"score\": get_average_row_means(N_table),\n",
        "                    \"sub_factors\": get_column_avg(N_table)\n",
        "                },\n",
        "                \"C\":{\n",
        "                    \"score\": get_average_row_means(C_table),\n",
        "                    \"sub_factors\": get_column_avg(C_table)\n",
        "                },\n",
        "                \"A\":{\n",
        "                    \"score\": get_average_row_means(A_table),\n",
        "                    \"sub_factors\": get_column_avg(A_table)\n",
        "                },\n",
        "                \"O\":{\n",
        "                    \"score\": get_average_row_means(O_table),\n",
        "                    \"sub_factors\": get_column_avg(O_table)\n",
        "                },\n",
        "                \"E\":{\n",
        "                    \"score\": get_average_row_means(E_table),\n",
        "                    \"sub_factors\": get_column_avg(E_table)\n",
        "                },\n",
        "                \"N_table\" : indexing_subtable(N_table),\n",
        "                \"C_table\" : indexing_subtable(C_table),\n",
        "                \"A_table\" : indexing_subtable(A_table),\n",
        "                \"O_table\" : indexing_subtable(O_table),\n",
        "                \"E_table\" : indexing_subtable(E_table)\n",
        "            }\n",
        "            json.dump(added_score, jsonl_file, ensure_ascii=False)\n",
        "            jsonl_file.write('\\n')\n",
        "        role_name = data[\"role_name\"]\n",
        "    # print(table)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8EC6ifu-2jfq",
        "outputId": "39adb4f5-4991-41d1-c23f-510109656115"
      },
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0\n",
            "4\n",
            "26\n",
            "33\n",
            "18\n",
            "25\n",
            "11\n",
            "17\n",
            "5\n",
            "10\n",
            "0\n",
            "4\n",
            "25\n",
            "32\n",
            "18\n",
            "24\n",
            "11\n",
            "17\n",
            "5\n",
            "10\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "\n",
        "with open(\"result.jsonl\", 'r', encoding='utf-8') as file:\n",
        "    for line in file:\n",
        "        # 解析 JSON 数据\n",
        "        json_data = json.loads(line)\n",
        "\n",
        "def drawTable(table):\n",
        "    df = pd.DataFrame(table)\n",
        "    result = df.to_string(index=False)\n",
        "    return result\n",
        "\n",
        "print(drawTable(json_data[\"E_table\"]))\n",
        "print(\"\\n\")\n",
        "print(drawTable(json_data[\"N_table\"]))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ezNXmi6KXkpS",
        "outputId": "7c6a6308-e48e-4a28-be0a-594d2eef6a72"
      },
      "execution_count": 34,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "  E2   E6   E4   E1   E3\n",
            "  -2   -2   -2   -2 null\n",
            "null    5    5    5    5\n",
            "null null    2    2    2\n",
            "   2    2 null null    2\n",
            "  -2   -2   -2 null null\n",
            "   2    2    2 null null\n",
            "\n",
            "\n",
            "  N1  N3  N4   N2   N6\n",
            "  -2  -2  -2 null null\n",
            "null  -2  -2   -2   -2\n",
            "null   2   2 null    2\n",
            "null  -5  -5 null   -5\n",
            "null  -2  -2 null   -2\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "_Ck7ARpcYXCd"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}